/*
CREATE FIRM LEVEL DATA 

Data 	: 
Folder 	: 
Date	: 2017-11-15

Creator		: 	Jonas Cederlof	(JC)
Description 	:	

Notes		  : 

LATEST UPDATE : 2018-11-22

*/

********************************************************************************

clear
set more		 off
cap   log close 	_all

log using 	"../log/A1_clean_firmchar.log", replace 


{ // Appending years of Jobb data (2000 to 2018)
*===============================================================================

*Open 2000 Jobb data
use 	lopnr* year lonfink yrkstallnku astsni* 	using	"$rawdatapath/jobb_2000.dta", clear

*Appending year 2001-2018
forvalues i = 2001(1)2018 {
	append using "$rawdatapath/jobb_`i'.dta", keep(lopnr* year lonfink yrkstallnku astsni*)
}
append using "$datapath/A0_jobb2019.dta"
}
*
{ // Rename variables
*===============================================================================
*Rename
rename lopnr 	 		persid
rename lopnr_peorgnr 	 	firmid
rename lopnr_cfarnr 	 	plantid

}
*

{ // Drop e.g. self-employed and sea-workers
*===============================================================================
*drop if yrkstallnku=="1"
*drop if yrkstallnku=="4"
*drop if yrkstallnku=="5"
}
*

*A few indivuals have mutliple registrations at the same firm the same year
gduplicates report persid firmid  year

*Collapse to unieq persid firm year combination
sort persid year  firmid lonfink // this sorting makes sure that the plant(!) with the highest earnings is the plant this is collapsed at the firm level 
fcollapse (last) astsni* plantid (sum) lonfink , by(persid firmid year) fast

{ // Merge on characteristics of indiviuals
*===============================================================================
{ // Merge on date of birth
*===============================================================================
merge m:1 persid using "$datapath/A0_fodelsedatum.dta"
drop if _merge==2
drop  	_merge


gen firm_avg_age = year - year(dofm(birth)) // becomes average when collapsed
drop birth
}
*
{ // Merge on date of birth
*===============================================================================
merge m:1 persid year using "$datapath/A0_LISAind_2000_2018.dta", keepusing(utbniv female)
drop if _merge==2
drop  	_merge
}


gen firm_share_higheduc = utbniv>=5

}
*

*Generate average plantsize per firm when collapsing below
bys firmid plantid year : gen noofplants = 1 if _n==_N	


*Collapse to firm-level
fcollapse (first) astsni* (count) persid (sum) noofplants firm_total_earnings=lonfink (mean) lonfink firm_share_higheduc firm_share_female=female firm_avg_age, by(firmid year) fast

gen firm_avgplantsize = persid/noofplants
rename lonfink 			firm_avg_earnings			
rename persid 			firm_noemployed_RAMS
rename noofplants		firm_numberofplants

replace firm_total_earnings = firm_total_earnings + firm_total_earnings*.3

lab var firm_noemployed_RAMS		"Number of employed at firm in a year"
lab var firm_avgplantsize 		"Average size of plant at firm"
lab var firm_share_female		"Share of males at firm"
lab var firm_avg_age			"Average age at firm"
lab var	firm_share_higheduc		"Share of college educated at firm"
lab var firm_total_earnings		"Total wage payments + social contributions"	
compress
save "$datapath/A1_firm_char.dta",replace



{ // Generate dataset with average and median wages at firms
*===============================================================================
use 		"$datapath/A1_clean_wagesurvey_00_19.dta",clear


gcollapse (mean) firm_mean_manl=manl (median) firm_median_manl=manl , by(year firmid)

compress
save "$datapath/A1_firm_wages.dta",replace

}
*	
